library("tidyverse")
library("data.table")
library("rtracklayer")
library("ggrastr")
library("DESeq2")
library("ggpubr")
library("wigglescout")
library("eulerr")
library("ggplot2")

# export 
result_folder = "../results/wigglescout/"

bws <- list.files("../data/CutNTag_ChIP-Seq/bw/",
                  full.names = TRUE)
# diff signal function
bw_granges_diff_analysis <- function(granges_c1,
                                     granges_c2,
                                     label_c1,
                                     label_c2,
                                     estimate_size_factors = FALSE,
                                     as_granges = FALSE) {
  # Bind first, get numbers after
  names_values <- NULL
  fields <- names(mcols(granges_c1))
  
  if ("name" %in% fields) {
    names_values <- mcols(granges_c1)[["name"]]
    granges_c1 <- granges_c1[, fields[fields != "name"]]
  }
  
  fields <- names(mcols(granges_c2))
  if ("name" %in% fields) {
    granges_c2 <- granges_c2[, fields[fields != "name"]]
  }
  
  cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
  
  if (!is.null(names_values)) {
    rownames(cts_df) <- names_values
  }
  
  # Needs to drop non-complete cases and match rows
  complete <- complete.cases(cts_df)
  cts_df <- cts_df[complete, ]
  
  values_df <- cts_df[, 6:ncol(cts_df)] %>% dplyr::select(where(is.numeric))
  cts <- get_nreads_columns(values_df)
  
  condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), rep(label_c2, length(mcols(granges_c2))))
  
  
  coldata <- data.frame(colnames(cts), condition = as.factor(condition_labels))
  print(coldata)
  
  dds <- DESeq2::DESeqDataSetFromMatrix(
    countData = cts,
    colData = coldata,
    design = ~ condition,
    rowRanges = granges_c1[complete, ]
  )
  
  
  if (estimate_size_factors == TRUE) {
    dds <- DESeq2::estimateSizeFactors(dds)
  }
  else {
    # Since files are scaled, we do not want to estimate size factors
    sizeFactors(dds) <- c(rep(1, ncol(cts)))
  }
  
  dds <- DESeq2::estimateDispersions(dds)
  dds <- DESeq2::nbinomWaldTest(dds)
  
  if (as_granges) {
    result <- DESeq2::results(dds, format = "GRanges", alpha = 0.01)
    if (!is.null(names_values)) {
      result$name <- names_values[complete]
    }
    
  }
  else {
    # result <- results(dds, format="DataFrame")
    result <- dds
  }
  
  result
}

get_nreads_columns <- function(df, length_factor = 100) {
  # Convert mean coverages to round integer read numbers
  cts <- as.matrix(df)
  cts <- as.matrix(cts[complete.cases(cts), ])
  cts <- round(cts * length_factor)
  cts
}
ctcf.and.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCFonly.bed")
ctcf.and.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCFonly.bed")

ctcf.and.G4.pro$class <- "CTCF_and_G4"
ctcf.not.G4.pro$class <- "CTCF_not_G4"
ctcf.and.G4.npr$class <- "CTCF_and_G4"
ctcf.not.G4.npr$class <- "CTCF_not_G4"
ctcf.and.G4.pro$pro <- "Pro"
ctcf.not.G4.pro$pro <- "Pro"
ctcf.and.G4.npr$pro <- "noPro"
ctcf.not.G4.npr$pro <- "noPro"

ctcf.and.G4 <- c(ctcf.and.G4.pro,ctcf.and.G4.npr)
ctcf.not.G4 <- c(ctcf.not.G4.pro,ctcf.not.G4.npr)
ctcf <- c(ctcf.and.G4,ctcf.not.G4)
ctcf <- sortSeqlevels(ctcf)
ctcf <- sort(ctcf)
names(ctcf) <- paste0(ctcf$class," ",ctcf$pro)
peaks_bed <- "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed"
export.bed(ctcf, "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed")
# Wulfridge CTCF ChIP-Seq mocks
mocks_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958387_GSM7116277_E14_Mock_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
          "../data/CutNTag_ChIP-Seq/bw/SRR23958386_GSM7116278_E14_Mock_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

# PDS
# Wulfridge CTCF ChIP-Seq
pds_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958385_GSM7116279_E14_PDS_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958384_GSM7116280_E14_PDS_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

pdc_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958383_GSM7116281_E14_PhenDC3_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958382_GSM7116282_E14_PhenDC3_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

G4_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/G4_CnT_NT_batch3_R1.unique.bw")

cov.mocks <- bw_loci(mocks_bigwigs,peaks_bed,labels=c("mock_1","mock_2"))
cov.pds <- bw_loci(pds_bigwigs,peaks_bed,labels=c("PDS_1","PDS_2"))
cov.pdc <- bw_loci(pdc_bigwigs,peaks_bed,labels=c("PhenDC3_1","PhenDC3_2"))
cov.G4 <- bw_loci(G4_bigwigs,peaks_bed,labels=c("G4_NT"))
results <- data.frame(
  as.data.frame(cov.mocks),
  as.data.frame(cov.pds)[6:7],
  as.data.frame(cov.pdc)[6:7],
  as.data.frame(cov.G4)[6],
  raw.lfc.pds_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pds_2 = log2(cov.pds$PDS_2/cov.mocks$mock_2),
  raw.lfc.pdc_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2/cov.mocks$mock_2),
  raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
  mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
  mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
  )

cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL

de_pds <- bw_granges_diff_analysis(cov.mocks,cov.pds,"Mock","PDS")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
    Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
    sequence count data: removing the noise and preserving large differences.
    Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
de_pdc <- bw_granges_diff_analysis(cov.mocks,cov.pdc,"Mock","PhenDC3")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
    Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
    sequence count data: removing the noise and preserving large differences.
    Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05

results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05

results$class <- gsub(" .+","",results$name)
results$pro <- gsub(".+ ","",results$name)
results$pro <- factor(results$pro, levels=c("Pro","noPro"))
results$class <- factor(results$class,levels=c("CTCF_and_G4","CTCF_not_G4"))

results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)

results$deseq.sigup.pds <- results$deseq.sig.pds & results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc & results$deseq.lfc.pdc > 0
write.table(results,"foldchange_results.txt")
table(results$name)

CTCF_and_G4 noPro   CTCF_and_G4 Pro CTCF_not_G4 noPro   CTCF_not_G4 Pro 
              988              1300             43309              6016 
results <- read.table("foldchange_results.txt")
results$class <- factor(results$class, levels=c("CTCF_and_G4","CTCF_not_G4"))
mypal <-c("cornflowerblue","orange","red2","darkgreen","#505050")
mypal <-c("#00619D","#A82A34","orange","seagreen","#505050")
mypal2 <-c("#00619D","#7FB0CE","#A82A34","#D39499","orange","#FFD55A","seagreen","#7DBA9C","#505050")
p <- ggviolin(results,x ="class",y="mean.mock",fill="class",palette=mypal,add="median_iqr") + coord_cartesian(ylim=c(0,10))
annotate_figure(p,fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr",fig.lab.size = 6)

ggsave("Violin_CTCF_classes.pdf",last_plot())
Saving 3 x 3 in image
p <- ggviolin(results,x ="pro",y="mean.mock",fill="class",palette=mypal,add="median_iqr") + coord_cartesian(ylim=c(0,10))
annotate_figure(p,fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr",fig.lab.size = 6)

ggsave("Violin_CTCF_classes_pro.pdf",last_plot())
Saving 3 x 3 in image
mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
Using class as id variables
ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal ) + coord_cartesian(ylim=c(0,10))

ggsave("Boxplot_CTCF_reps.pdf",last_plot())
Saving 5 x 3 in image
mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
Using class as id variables
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal,add="median_iqr") + coord_cartesian(ylim=c(0,10))

ggsave("Violin_CTCF_reps.pdf",last_plot())
Saving 5 x 3 in image
mdf <- reshape2::melt(dplyr::select(results,c("class","deseq.lfc.pds","deseq.lfc.pdc")))
Using class as id variables
p <- ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="median_iqr") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-2,2))
annotate_figure(p,fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr",fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_CTCF_lfc.pdf",last_plot())
Saving 3 x 3 in image
p <- ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal) + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-2,2))
annotate_figure(p,fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr",fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_boxplot()`).

ggsave("Boxplot_CTCF_lfc.pdf",last_plot())
Saving 3 x 3 in image
mdf <- reshape2::melt(dplyr::select(results,c("pro","class","deseq.lfc.pds","deseq.lfc.pdc")))
Using pro, class as id variables
p <- ggviolin(mdf,x ="pro",y="value",fill="class",palette=mypal, add="median_iqr", facet.by="variable") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-2,2))
annotate_figure(p,fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr",fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_CTCF_lfc_pro.pdf",last_plot())
Saving 5 x 3 in image
compare_means(raw.lfc.pds ~ class,data = results)
compare_means(raw.lfc.pdc ~ class,data = results,)
mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds_1","raw.lfc.pds_2","raw.lfc.pdc_1","raw.lfc.pdc_2")))
Using class as id variables
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="median_iqr") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-4,4))
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_CTCF_lfc_individual_reps.pdf",last_plot())
Saving 5 x 3 in image
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).
ggdensity(results,x="raw.lfc.pds",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
Warning: Removed 26 rows containing non-finite outside the scale range (`stat_density()`).

ggdensity(results,x="raw.lfc.pdc",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
Warning: Removed 23 rows containing non-finite outside the scale range (`stat_density()`).

ggscatterhist(results,x ="log2.mock",y="log2.pds",size = 0.2, alpha=0.1,color="deseq.sig.pds",margin.params = list(fill="class",color="black",size=0.2),palette=mypal[c(5,2)])
Warning: Removed 15 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 12 rows containing non-finite outside the scale range (`stat_density()`).

ggsave("Scatter_CTCF_DESEq_PDSvsMock.pdf",last_plot())
Saving 4 x 4 in image
ggscatterhist(results,x ="log2.mock",y="log2.pdc",size = 0.2, alpha=0.1,color="deseq.sig.pds",margin.params = list(fill="class",color="black",size=0.2),palette=mypal[c(5,2)])
Warning: Removed 15 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 8 rows containing non-finite outside the scale range (`stat_density()`).

ggsave("Scatter_CTCF_DESEq_PhenDC3vsMock.pdf",last_plot())
Saving 4 x 4 in image
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$pro)
       
        noPro   Pro
  FALSE 42094  6938
  TRUE   2200   378
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class)
       
        CTCF_and_G4 CTCF_not_G4
  FALSE        2220       46812
  TRUE           68        2510
mdf<-reshape2::melt(table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()

vl <- list(sig=grep("TRUE",results$deseq.sigup.pds),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)

table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class)
       
        CTCF_and_G4 CTCF_not_G4
  FALSE        2251       48545
  TRUE           37         780
mdf<-reshape2::melt(table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()

results$uid <- seq(1:nrow(results))
vl <- list(sig=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)

table(results$deseq.sigup.pds,results$deseq.sigup.pdc)
       
        FALSE  TRUE
  FALSE 48576   456
  TRUE   2217   361
mdf<-reshape2::melt(table(results$deseq.sigup.pds,results$deseq.sigup.pdc))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()

vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)

results$uid <- seq(1:nrow(results))
vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class))
plot(euler(vl),quantities=T)

results$sig.by.class.pds <- paste0(results$deseq.sigup.pds,"_",results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds & results$class=="CTCF_and_G4"] <- 1

ggscatter(results,x ="log2.mock",y="log2.pds",size = 0.5, alpha=results$psize,color = "sig.by.class.pds",palette=c(mypal[5],mypal[5],mypal[5],mypal[2],mypal[1]))

ggscatter(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="deseq.lfc.pds",size = 0.2, alpha="psize",color = "sig.by.class.pds",palette=c("#505050","#505050","red2","blue"))

ggscatterhist(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="log2.pds",size = 0.4, alpha="psize",color = "sig.by.class.pds",margin.params = list(fill="sig.by.class.pds",color="black",size=0.2),palette=c("#505050","#505050","red2","blue"))
Warning: Removed 13 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 10 rows containing non-finite outside the scale range (`stat_density()`).

ggscatter(results,x ="raw.lfc.pds",y="log2.G4",size = 0.2, alpha=0.1,color="deseq.sigup.pds")
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).

results$G4.quantile <- cut_number(results$G4_NT, n = 5,labels=F)
ggstripchart(results,x="G4.quantile", y="deseq.lfc.pds",color=mypal[2],add=c("violin","median_iqr"),add.params=list(color="black",fill=mypal[1]),size=0.2, alpha=results$deseq.sig.pds/4, palette=mypal) + coord_cartesian(ylim=c(-3,3)) + geom_hline(yintercept = 0, linetype="dotted")
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_point()`).

ggsave("Violin_CTCF_lfc_by_G4quantile_sig_highlighted.pdf")
Saving 3 x 3 in image
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_point()`).
table(results$G4.quantile,results$deseq.sig.pds)
   
    FALSE TRUE
  1  9565  757
  2  9759  561
  3  9794  529
  4  9916  406
  5  9988  335
ggviolin(results,x="G4.quantile", y="deseq.lfc.pds",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0, linetype="dotted")
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_CTCF_lfc_by_G4quantile.pdf")
Saving 3 x 3 in image
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
ggviolin(results,x="G4.quantile", y="log2.G4",fill=mypal2[5],add="median_iqr") + geom_hline(yintercept = 0, linetype="dotted")
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_G4_by_G4quantile.pdf")
Saving 3 x 3 in image
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_summary()`).
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/CutNTag_ChIP-Seq/bw/GSM6634325_E14_Mock_G4.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 2284 generated ( 0.0527909395585346 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 324 generated ( 0.053919121318023 per locus)

peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile(G4_bigwigs,peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)

plot_bw_profile(mocks_bigwigs[1],peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 2291 generated ( 0.0529527331561308 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 306 generated ( 0.0509236145781328 per locus)

p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)

p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)

cen we learn something from the genes with top-most increase in CTCF?

sigup.pds <- results[results$deseq.sigup.pds,]
sigup.pdc <- results[results$deseq.sigup.pdc,]

#export.bed(sigup.pds,"peaks_sigup_pds.bed")
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
gghistogram(results,"width", add="median", fill="class")
Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.

Overlap analysis

G4_bed <- import('../data/G4_CnT/G4_WT_peaks.narrowPeak')
ATAC_bed <- import('../data/ATAC-seq/ATAC_seq_mESC_Martire_peaks.narrowPeak')
pro_bed <- import('../data/regions/promoters_geneSymbol.mm10.bed')

G4_bed <- bedscout::annotate_overlapping_features(G4_bed,pro_bed,name_field = "name")

G4_bed$name <- "G4"
G4_bed$name[!is.na(G4_bed$nearby_features)] <- "G4pro"
G4_bed$name[grepl("pro",G4_bed$name)] <- "G4pro"
nearest_G4_1kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 1000,ignore.strand = T,name_field = "name")
nearest_G4_2.5kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 2500,ignore.strand = T,name_field = "name")
nearest_G4_5kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 5000,ignore.strand = T,name_field = "name")
nearest_G4_10kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 10000,ignore.strand = T,name_field = "name")
nearest_G4_50kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 50000,ignore.strand = T,name_field = "name")

ctcf$nearest_G4 <- factor(">50kb", levels=c("<1kb","<2.5kb","<5kb","<10kb","<50kb",">50kb"))
ctcf$nearest_G4[!is.na(nearest_G4_50kb$nearby_features)] <- "<50kb"
ctcf$nearest_G4[!is.na(nearest_G4_10kb$nearby_features)] <- "<10kb"
ctcf$nearest_G4[!is.na(nearest_G4_5kb$nearby_features)] <- "<5kb"
ctcf$nearest_G4[!is.na(nearest_G4_2.5kb$nearby_features)] <- "<2.5kb"
ctcf$nearest_G4[!is.na(nearest_G4_1kb$nearby_features)] <- "<1kb"

ctcf$nearest_G4_type <- "none"
ctcf$nearest_G4_type[!is.na(nearest_G4_50kb$nearby_features)] <- nearest_G4_50kb$nearby_features[!is.na(nearest_G4_50kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_10kb$nearby_features)] <- nearest_G4_10kb$nearby_features[!is.na(nearest_G4_10kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_5kb$nearby_features)] <- nearest_G4_5kb$nearby_features[!is.na(nearest_G4_5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_2.5kb$nearby_features)] <- nearest_G4_2.5kb$nearby_features[!is.na(nearest_G4_2.5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_1kb$nearby_features)] <- nearest_G4_1kb$nearby_features[!is.na(nearest_G4_1kb$nearby_features)]

ctcf$nearest_G4_type[grep("pro",ctcf$nearest_G4_type)] <- "G4pro"

results$nearest_G4 <- ctcf$nearest_G4
results$nearest_G4_type <- ctcf$nearest_G4_type
table(results$nearest_G4)

  <1kb <2.5kb   <5kb  <10kb  <50kb  >50kb 
  3540   1558   2350   4285  20140  19740 
table(results$nearest_G4_type)

   G4 G4pro  none 
11457 20416 19740 
table(results$nearest_G4, results$nearest_G4_type)
        
            G4 G4pro  none
  <1kb    1426  2114     0
  <2.5kb   581   977     0
  <5kb     948  1402     0
  <10kb   1675  2610     0
  <50kb   6827 13313     0
  >50kb      0     0 19740
ggviolin(results,x="nearest_G4", y="mean.mock",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(0,10)) + geom_hline(yintercept = 0, linetype="dotted")

ggsave("Violin_CTCF_signal_by_G4distance.pdf")
Saving 3 x 3 in image
ggviolin(results,x="nearest_G4", y="deseq.lfc.pds",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2 ) + geom_hline(yintercept = median(results$deseq.lfc.pds[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_CTCF_PDS_lfc_by_G4distance.pdf")
Saving 3 x 3 in image
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
compare_means(deseq.lfc.pds ~ nearest_G4, results)
ggviolin(results,x="nearest_G4", y="deseq.lfc.pdc",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2 ) + geom_hline(yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)

ggsave("Violin_CTCF_PhenDC3_lfc_by_G4distance.pdf")
Saving 3 x 3 in image
compare_means(deseq.lfc.pdc ~ nearest_G4, results)
ggviolin(results,x="nearest_G4", y="deseq.lfc.pds",fill="nearest_G4_type",palette=mypal[c(1,3,5)],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2 ) + geom_hline(yintercept = mean(results$deseq.lfc.pds[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggsave("Violin_CTCF_PDS_lfc_by_G4distance_pro.pdf")
Saving 5 x 3 in image
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type=="G4",])
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type=="G4pro",])
ggviolin(results,x="nearest_G4", y="deseq.lfc.pdc",fill="nearest_G4_type",palette=mypal[c(1,3,5)],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2) + geom_hline(yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)

ggsave("Violin_CTCF_PhenDC3_lfc_by_G4distance_pro.pdf")
Saving 5 x 3 in image
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type=="G4",])
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type=="G4pro",])
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/regions/G4.mm10.plus.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal)

peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/regions/G4.mm10.minus.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal)

peak_cats_mm9 <- bedscout::import_named_bed_into_list('../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.mm9lift.bed')
plot_bw_profile("~/Google Drive/Simon/Manuscripts/Puck/github/genome/mm9/predG4.mm9.bw",peak_cats_mm9,labels=c(peak_cats_mm9[[1]][1,]$name,peak_cats_mm9[[2]][1,]$name,peak_cats_mm9[[3]][1,]$name,peak_cats_mm9[[4]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal)

ggsave("Profile_predicted_G4_motif.pdf")
Saving 7 x 7 in image
ggprofile("Profile_predicted_G4_motif.pdf")
Error in ggprofile("Profile_predicted_G4_motif.pdf") : 
  could not find function "ggprofile"
---
title: "R Notebook"
output: html_notebook
---


```{r}
library("tidyverse")
library("data.table")
library("rtracklayer")
library("ggrastr")
library("DESeq2")
library("ggpubr")
library("wigglescout")
library("eulerr")
library("ggplot2")

# export 
result_folder = "../results/wigglescout/"

bws <- list.files("../data/CutNTag_ChIP-Seq/bw/",
                  full.names = TRUE)

```

```{r}
# diff signal function
bw_granges_diff_analysis <- function(granges_c1,
                                     granges_c2,
                                     label_c1,
                                     label_c2,
                                     estimate_size_factors = FALSE,
                                     as_granges = FALSE) {
  # Bind first, get numbers after
  names_values <- NULL
  fields <- names(mcols(granges_c1))
  
  if ("name" %in% fields) {
    names_values <- mcols(granges_c1)[["name"]]
    granges_c1 <- granges_c1[, fields[fields != "name"]]
  }
  
  fields <- names(mcols(granges_c2))
  if ("name" %in% fields) {
    granges_c2 <- granges_c2[, fields[fields != "name"]]
  }
  
  cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
  
  if (!is.null(names_values)) {
    rownames(cts_df) <- names_values
  }
  
  # Needs to drop non-complete cases and match rows
  complete <- complete.cases(cts_df)
  cts_df <- cts_df[complete, ]
  
  values_df <- cts_df[, 6:ncol(cts_df)] %>% dplyr::select(where(is.numeric))
  cts <- get_nreads_columns(values_df)
  
  condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), rep(label_c2, length(mcols(granges_c2))))
  
  
  coldata <- data.frame(colnames(cts), condition = as.factor(condition_labels))
  print(coldata)
  
  dds <- DESeq2::DESeqDataSetFromMatrix(
    countData = cts,
    colData = coldata,
    design = ~ condition,
    rowRanges = granges_c1[complete, ]
  )
  
  
  if (estimate_size_factors == TRUE) {
    dds <- DESeq2::estimateSizeFactors(dds)
  }
  else {
    # Since files are scaled, we do not want to estimate size factors
    sizeFactors(dds) <- c(rep(1, ncol(cts)))
  }
  
  dds <- DESeq2::estimateDispersions(dds)
  dds <- DESeq2::nbinomWaldTest(dds)
  
  if (as_granges) {
    result <- DESeq2::results(dds, format = "GRanges", alpha = 0.01)
    if (!is.null(names_values)) {
      result$name <- names_values[complete]
    }
    
  }
  else {
    # result <- results(dds, format="DataFrame")
    result <- dds
  }
  
  result
}

get_nreads_columns <- function(df, length_factor = 100) {
  # Convert mean coverages to round integer read numbers
  cts <- as.matrix(df)
  cts <- as.matrix(cts[complete.cases(cts), ])
  cts <- round(cts * length_factor)
  cts
}
```

```{r}
ctcf.and.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCFonly.bed")
ctcf.and.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCFonly.bed")

ctcf.and.G4.pro$class <- "CTCF_and_G4"
ctcf.not.G4.pro$class <- "CTCF_not_G4"
ctcf.and.G4.npr$class <- "CTCF_and_G4"
ctcf.not.G4.npr$class <- "CTCF_not_G4"
ctcf.and.G4.pro$pro <- "Pro"
ctcf.not.G4.pro$pro <- "Pro"
ctcf.and.G4.npr$pro <- "noPro"
ctcf.not.G4.npr$pro <- "noPro"

ctcf.and.G4 <- c(ctcf.and.G4.pro,ctcf.and.G4.npr)
ctcf.not.G4 <- c(ctcf.not.G4.pro,ctcf.not.G4.npr)
ctcf <- c(ctcf.and.G4,ctcf.not.G4)
ctcf <- sortSeqlevels(ctcf)
ctcf <- sort(ctcf)
names(ctcf) <- paste0(ctcf$class," ",ctcf$pro)
peaks_bed <- "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed"
export.bed(ctcf, "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed")
```

```{r}
# Wulfridge CTCF ChIP-Seq mocks
mocks_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958387_GSM7116277_E14_Mock_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
          "../data/CutNTag_ChIP-Seq/bw/SRR23958386_GSM7116278_E14_Mock_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

# PDS
# Wulfridge CTCF ChIP-Seq
pds_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958385_GSM7116279_E14_PDS_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958384_GSM7116280_E14_PDS_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

pdc_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958383_GSM7116281_E14_PhenDC3_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958382_GSM7116282_E14_PhenDC3_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

G4_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/G4_CnT_NT_batch3_R1.unique.bw")

cov.mocks <- bw_loci(mocks_bigwigs,peaks_bed,labels=c("mock_1","mock_2"))
cov.pds <- bw_loci(pds_bigwigs,peaks_bed,labels=c("PDS_1","PDS_2"))
cov.pdc <- bw_loci(pdc_bigwigs,peaks_bed,labels=c("PhenDC3_1","PhenDC3_2"))
cov.G4 <- bw_loci(G4_bigwigs,peaks_bed,labels=c("G4_NT"))
```


```{r}
results <- data.frame(
  as.data.frame(cov.mocks),
  as.data.frame(cov.pds)[6:7],
  as.data.frame(cov.pdc)[6:7],
  as.data.frame(cov.G4)[6],
  raw.lfc.pds_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pds_2 = log2(cov.pds$PDS_2/cov.mocks$mock_2),
  raw.lfc.pdc_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2/cov.mocks$mock_2),
  raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
  mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
  mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
  )

cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL

de_pds <- bw_granges_diff_analysis(cov.mocks,cov.pds,"Mock","PDS")
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")

de_pdc <- bw_granges_diff_analysis(cov.mocks,cov.pdc,"Mock","PhenDC3")
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")

results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05

results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05

results$class <- gsub(" .+","",results$name)
results$pro <- gsub(".+ ","",results$name)
results$pro <- factor(results$pro, levels=c("Pro","noPro"))
results$class <- factor(results$class,levels=c("CTCF_and_G4","CTCF_not_G4"))

results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)

results$deseq.sigup.pds <- results$deseq.sig.pds & results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc & results$deseq.lfc.pdc > 0
```

```{r}
write.table(results,"foldchange_results.txt")
table(results$name)
```

```{r}
results <- read.table("foldchange_results.txt")
results$class <- factor(results$class, levels=c("CTCF_and_G4","CTCF_not_G4"))
```

```{r}
mypal <-c("cornflowerblue","orange","red2","darkgreen","#505050")
mypal <-c("#00619D","#A82A34","orange","seagreen","#505050")
mypal2 <-c("#00619D","#7FB0CE","#A82A34","#D39499","orange","#FFD55A","seagreen","#7DBA9C","#505050")
```

```{r fig.width=3, fig.height=3}
p <- ggviolin(results,x ="class",y="mean.mock",fill="class",palette=mypal,add="median_iqr") + coord_cartesian(ylim=c(0,10))
annotate_figure(p,fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr",fig.lab.size = 6)
ggsave("Violin_CTCF_classes.pdf",last_plot())
```
```{r fig.width=3, fig.height=3}
p <- ggviolin(results,x ="pro",y="mean.mock",fill="class",palette=mypal,add="median_iqr") + coord_cartesian(ylim=c(0,10))
annotate_figure(p,fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr",fig.lab.size = 6)
ggsave("Violin_CTCF_classes_pro.pdf",last_plot())
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal ) + coord_cartesian(ylim=c(0,10))
ggsave("Boxplot_CTCF_reps.pdf",last_plot())
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal,add="median_iqr") + coord_cartesian(ylim=c(0,10))
ggsave("Violin_CTCF_reps.pdf",last_plot())
```
```{r fig.width=3, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","deseq.lfc.pds","deseq.lfc.pdc")))
p <- ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="median_iqr") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-2,2))
annotate_figure(p,fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr",fig.lab.size = 6)
ggsave("Violin_CTCF_lfc.pdf",last_plot())
```
```{r fig.width=3, fig.height=3}
p <- ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal) + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-2,2))
annotate_figure(p,fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr",fig.lab.size = 6)
ggsave("Boxplot_CTCF_lfc.pdf",last_plot())
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("pro","class","deseq.lfc.pds","deseq.lfc.pdc")))
p <- ggviolin(mdf,x ="pro",y="value",fill="class",palette=mypal, add="median_iqr", facet.by="variable") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-2,2))
annotate_figure(p,fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr",fig.lab.size = 6)
ggsave("Violin_CTCF_lfc_pro.pdf",last_plot())
```

```{r}
compare_means(raw.lfc.pds ~ class,data = results)
```

```{r}
compare_means(raw.lfc.pdc ~ class,data = results,)
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds_1","raw.lfc.pds_2","raw.lfc.pdc_1","raw.lfc.pdc_2")))
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="median_iqr") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-4,4))
ggsave("Violin_CTCF_lfc_individual_reps.pdf",last_plot())
```

```{r fig.width=3, fig.height=3}
ggdensity(results,x="raw.lfc.pds",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
```

```{r fig.width=3, fig.height=3}
ggdensity(results,x="raw.lfc.pdc",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
```


```{r fig.width=4, fig.height=4}
ggscatterhist(results,x ="log2.mock",y="log2.pds",size = 0.2, alpha=0.1,color="deseq.sig.pds",margin.params = list(fill="class",color="black",size=0.2),palette=mypal[c(5,2)])
ggsave("Scatter_CTCF_DESEq_PDSvsMock.pdf",last_plot())
```
```{r fig.width=4, fig.height=4}
ggscatterhist(results,x ="log2.mock",y="log2.pdc",size = 0.2, alpha=0.1,color="deseq.sig.pds",margin.params = list(fill="class",color="black",size=0.2),palette=mypal[c(5,2)])
ggsave("Scatter_CTCF_DESEq_PhenDC3vsMock.pdf",last_plot())
```

```{r}
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$pro)
```

```{r fig.width=3, fig.height=2.5}
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class)
mdf<-reshape2::melt(table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()
```

```{r}
vl <- list(sig=grep("TRUE",results$deseq.sigup.pds),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)
```

```{r fig.width=3, fig.height=2.5}
table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class)
mdf<-reshape2::melt(table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()
```


```{r}
results$uid <- seq(1:nrow(results))
vl <- list(sig=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)
```
 
```{r fig.width=3, fig.height=2.5}
table(results$deseq.sigup.pds,results$deseq.sigup.pdc)
mdf<-reshape2::melt(table(results$deseq.sigup.pds,results$deseq.sigup.pdc))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()
```

```{r}
vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)
```

```{r}
results$uid <- seq(1:nrow(results))
vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class))
plot(euler(vl),quantities=T)
```
```{r fig.width=6, fig.height=6}
results$sig.by.class.pds <- paste0(results$deseq.sigup.pds,"_",results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds & results$class=="CTCF_and_G4"] <- 1

ggscatter(results,x ="log2.mock",y="log2.pds",size = 0.5, alpha=results$psize,color = "sig.by.class.pds",palette=c(mypal[5],mypal[5],mypal[5],mypal[2],mypal[1]))
```
```{r fig.width=6, fig.height=6}
ggscatter(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="deseq.lfc.pds",size = 0.2, alpha="psize",color = "sig.by.class.pds",palette=c("#505050","#505050","red2","blue"))
```

```{r fig.width=5, fig.height=5}
ggscatterhist(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="log2.pds",size = 0.4, alpha="psize",color = "sig.by.class.pds",margin.params = list(fill="sig.by.class.pds",color="black",size=0.2),palette=c("#505050","#505050","red2","blue"))
```

```{r fig.width=4, fig.height=4}
ggscatter(results,x ="raw.lfc.pds",y="log2.G4",size = 0.2, alpha=0.1,color="deseq.sigup.pds")
```


```{r fig.width=3, fig.height=3}
results$G4.quantile <- cut_number(results$G4_NT, n = 5,labels=F)
ggstripchart(results,x="G4.quantile", y="deseq.lfc.pds",color=mypal[2],add=c("violin","median_iqr"),add.params=list(color="black",fill=mypal[1]),size=0.2, alpha=results$deseq.sig.pds/4, palette=mypal) + coord_cartesian(ylim=c(-3,3)) + geom_hline(yintercept = 0, linetype="dotted")
ggsave("Violin_CTCF_lfc_by_G4quantile_sig_highlighted.pdf")
```
```{r}
table(results$G4.quantile,results$deseq.sig.pds)
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x="G4.quantile", y="deseq.lfc.pds",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0, linetype="dotted")
ggsave("Violin_CTCF_lfc_by_G4quantile.pdf")
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x="G4.quantile", y="log2.G4",fill=mypal2[5],add="median_iqr") + geom_hline(yintercept = 0, linetype="dotted")
ggsave("Violin_G4_by_G4quantile.pdf")
```



```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/CutNTag_ChIP-Seq/bw/GSM6634325_E14_Mock_G4.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
```

```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile(G4_bigwigs,peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
```

```{r fig.width=3, fig.height=3}
plot_bw_profile(mocks_bigwigs[1],peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
```




```{r fig.width=12, fig.height=3}
p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)

ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)
```

```{r fig.width=12, fig.height=3}
p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)

ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)
```
### cen we learn something from the genes with top-most increase in CTCF?
```{r}
sigup.pds <- results[results$deseq.sigup.pds,]
sigup.pdc <- results[results$deseq.sigup.pdc,]

#export.bed(sigup.pds,"peaks_sigup_pds.bed")
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
```


```{r}
gghistogram(results,"width", add="median", fill="class")
```
### Overlap analysis

```{r}
G4_bed <- import('../data/G4_CnT/G4_WT_peaks.narrowPeak')
ATAC_bed <- import('../data/ATAC-seq/ATAC_seq_mESC_Martire_peaks.narrowPeak')
pro_bed <- import('../data/regions/promoters_geneSymbol.mm10.bed')

G4_bed <- bedscout::annotate_overlapping_features(G4_bed,pro_bed,name_field = "name")

G4_bed$name <- "G4"
G4_bed$name[!is.na(G4_bed$nearby_features)] <- "G4pro"
G4_bed$name[grepl("pro",G4_bed$name)] <- "G4pro"
```

```{r}
nearest_G4_1kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 1000,ignore.strand = T,name_field = "name")
nearest_G4_2.5kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 2500,ignore.strand = T,name_field = "name")
nearest_G4_5kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 5000,ignore.strand = T,name_field = "name")
nearest_G4_10kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 10000,ignore.strand = T,name_field = "name")
nearest_G4_50kb <- bedscout::annotate_nearby_features(ctcf,G4_bed,distance_cutoff = 50000,ignore.strand = T,name_field = "name")

ctcf$nearest_G4 <- factor(">50kb", levels=c("<1kb","<2.5kb","<5kb","<10kb","<50kb",">50kb"))
ctcf$nearest_G4[!is.na(nearest_G4_50kb$nearby_features)] <- "<50kb"
ctcf$nearest_G4[!is.na(nearest_G4_10kb$nearby_features)] <- "<10kb"
ctcf$nearest_G4[!is.na(nearest_G4_5kb$nearby_features)] <- "<5kb"
ctcf$nearest_G4[!is.na(nearest_G4_2.5kb$nearby_features)] <- "<2.5kb"
ctcf$nearest_G4[!is.na(nearest_G4_1kb$nearby_features)] <- "<1kb"

ctcf$nearest_G4_type <- "none"
ctcf$nearest_G4_type[!is.na(nearest_G4_50kb$nearby_features)] <- nearest_G4_50kb$nearby_features[!is.na(nearest_G4_50kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_10kb$nearby_features)] <- nearest_G4_10kb$nearby_features[!is.na(nearest_G4_10kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_5kb$nearby_features)] <- nearest_G4_5kb$nearby_features[!is.na(nearest_G4_5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_2.5kb$nearby_features)] <- nearest_G4_2.5kb$nearby_features[!is.na(nearest_G4_2.5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_1kb$nearby_features)] <- nearest_G4_1kb$nearby_features[!is.na(nearest_G4_1kb$nearby_features)]

ctcf$nearest_G4_type[grep("pro",ctcf$nearest_G4_type)] <- "G4pro"

results$nearest_G4 <- ctcf$nearest_G4
results$nearest_G4_type <- ctcf$nearest_G4_type
table(results$nearest_G4)
table(results$nearest_G4_type)
table(results$nearest_G4, results$nearest_G4_type)
```
```{r fig.width=3, fig.height=3}
ggviolin(results,x="nearest_G4", y="mean.mock",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(0,10)) + geom_hline(yintercept = 0, linetype="dotted")
ggsave("Violin_CTCF_signal_by_G4distance.pdf")
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x="nearest_G4", y="deseq.lfc.pds",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2 ) + geom_hline(yintercept = median(results$deseq.lfc.pds[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)
ggsave("Violin_CTCF_PDS_lfc_by_G4distance.pdf")
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results)
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x="nearest_G4", y="deseq.lfc.pdc",fill=mypal[1],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2 ) + geom_hline(yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)
ggsave("Violin_CTCF_PhenDC3_lfc_by_G4distance.pdf")
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results)
```
```{r fig.width=5, fig.height=3}
ggviolin(results,x="nearest_G4", y="deseq.lfc.pds",fill="nearest_G4_type",palette=mypal[c(1,3,5)],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2 ) + geom_hline(yintercept = mean(results$deseq.lfc.pds[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)
ggsave("Violin_CTCF_PDS_lfc_by_G4distance_pro.pdf")
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type=="G4",])
```
```{r}
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type=="G4pro",])
```
```{r fig.width=5, fig.height=3}
ggviolin(results,x="nearest_G4", y="deseq.lfc.pdc",fill="nearest_G4_type",palette=mypal[c(1,3,5)],add="median_iqr") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0,linewidth = 0.2) + geom_hline(yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4=="<1kb"]), linetype="dotted",linewidth = 0.2)
ggsave("Violin_CTCF_PhenDC3_lfc_by_G4distance_pro.pdf")
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type=="G4",])
```

```{r}
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type=="G4pro",])
```
```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/regions/G4.mm10.plus.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal)
```

```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/regions/G4.mm10.minus.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal)
```
```{r fig.width=4, fig.height=4}
peak_cats_mm9 <- bedscout::import_named_bed_into_list('../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.mm9lift.bed')
plot_bw_profile("~/Google Drive/Simon/Manuscripts/Puck/github/genome/mm9/predG4.mm9.bw",peak_cats_mm9,labels=c(peak_cats_mm9[[1]][1,]$name,peak_cats_mm9[[2]][1,]$name,peak_cats_mm9[[3]][1,]$name,peak_cats_mm9[[4]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal)
```
```{r fig.width=4, fig.height=4}
peak_cats_mm9 <- bedscout::import_named_bed_into_list('../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_2_categories.mm9lift.bed')
plot_bw_profile("~/Google Drive/Simon/Manuscripts/Puck/github/genome/mm9/predG4.mm9.bw",peak_cats_mm9,labels=c(peak_cats_mm9[[1]][1,]$name,peak_cats_mm9[[2]][1,]$name),mode="center",show_error = T,verbose=F, colors=mypal,bin_size = 200)
ggsave("Profile_predicted_G4_motif.pdf")
```

```{r fig.width=6, fig.height=4}
p1 <- plot_bw_heatmap("~/Google Drive/Simon/Manuscripts/Puck/github/genome/mm9/predG4.mm9.bw",peak_cats_mm9[[1]],mode="center",verbose=F,max_rows_allowed = 500, zmax=0.3,cmap="Greys") + ggtitle("CTCF-G4")
p2 <- plot_bw_heatmap("~/Google Drive/Simon/Manuscripts/Puck/github/genome/mm9/predG4.mm9.bw",peak_cats_mm9[[2]],mode="center",verbose=F,max_rows_allowed = 500, zmax=0.3,cmap="Greys") + ggtitle("CTCF-only")
ggarrange(p1,p2)
ggsave("Heatmap_predicted_G4_motif.pdf")
```